#ifndef CUFFTDX_FFT_3125_FP64_FWD_PTX_HPP
#define CUFFTDX_FFT_3125_FP64_FWD_PTX_HPP



template<> __forceinline__ __device__ void cufftdx_private_function<545, double, 1>(cufftdx::detail::complex<double> *rmem, unsigned smem){

asm volatile (R"({
.reg .b32 r<30>;
.reg .f64 fd<433>;
.reg .b64 rd<22>;
mov.u32 r1, %tid.y;
mov.u32 r2, %10;
mad.lo.s32 r3, r1, 25000, r2;
mov.u32 r4, %tid.x;
add.f64 fd21, %17, %25;
add.f64 fd22, %15, fd21;
add.f64 fd23, %20, %23;
add.f64 fd24, fd23, fd22;
add.f64 fd25, %19, %26;
add.f64 fd26, %16, fd25;
add.f64 fd27, %22, %24;
add.f64 fd28, fd27, fd26;
fma.rn.f64 fd29, fd21, 0d3FD3C6EF372FE950, %15;
mul.f64 fd30, fd23, 0d3FE9E3779B97F4A8;
sub.f64 fd31, fd29, fd30;
sub.f64 fd32, %19, %26;
mul.f64 fd33, fd32, 0d3FEE6F0E134454FF;
sub.f64 fd34, %22, %24;
mul.f64 fd35, fd34, 0dBFE2CF2304755A5E;
sub.f64 fd36, fd35, fd33;
sub.f64 fd37, fd31, fd36;
add.f64 fd38, fd36, fd31;
mul.f64 fd39, fd21, 0d3FE9E3779B97F4A8;
sub.f64 fd40, %15, fd39;
fma.rn.f64 fd41, fd23, 0d3FD3C6EF372FE950, fd40;
mul.f64 fd42, fd32, 0d3FE2CF2304755A5E;
mul.f64 fd43, fd34, 0d3FEE6F0E134454FF;
sub.f64 fd44, fd43, fd42;
sub.f64 fd45, fd41, fd44;
add.f64 fd46, fd44, fd41;
fma.rn.f64 fd47, fd25, 0d3FD3C6EF372FE950, %16;
mul.f64 fd48, fd27, 0d3FE9E3779B97F4A8;
sub.f64 fd49, fd47, fd48;
sub.f64 fd50, %17, %25;
mul.f64 fd51, fd50, 0d3FEE6F0E134454FF;
sub.f64 fd52, %20, %23;
mul.f64 fd53, fd52, 0dBFE2CF2304755A5E;
sub.f64 fd54, fd53, fd51;
add.f64 fd55, fd54, fd49;
sub.f64 fd56, fd49, fd54;
mul.f64 fd57, fd25, 0d3FE9E3779B97F4A8;
sub.f64 fd58, %16, fd57;
fma.rn.f64 fd59, fd27, 0d3FD3C6EF372FE950, fd58;
mul.f64 fd60, fd50, 0d3FE2CF2304755A5E;
mul.f64 fd61, fd52, 0d3FEE6F0E134454FF;
sub.f64 fd62, fd61, fd60;
add.f64 fd63, fd62, fd59;
sub.f64 fd64, fd59, fd62;
mul.wide.u32 rd2, r4, -776530087;
shr.u64 rd3, rd2, 41;
cvt.u32.u64 r5, rd3;
mul.lo.s32 r6, r5, 625;
sub.s32 r7, r4, r6;
mul.wide.u32 rd4, r7, 16;
mov.u64 rd5, %11;
add.s64 rd6, rd5, rd4;
ld.global.v2.f64 {fd65, fd66}, [rd6];
mul.f64 fd69, fd65, fd37;
mul.f64 fd70, fd66, fd55;
sub.f64 fd71, fd69, fd70;
mul.f64 fd72, fd65, fd55;
fma.rn.f64 fd73, fd66, fd37, fd72;
mul.f64 fd74, fd65, fd65;
mul.f64 fd75, fd66, fd66;
sub.f64 fd76, fd74, fd75;
mul.f64 fd77, fd66, fd65;
fma.rn.f64 fd78, fd66, fd65, fd77;
mul.f64 fd79, fd76, fd45;
mul.f64 fd80, fd78, fd63;
sub.f64 fd81, fd79, fd80;
mul.f64 fd82, fd76, fd63;
fma.rn.f64 fd83, fd78, fd45, fd82;
ld.global.v2.f64 {fd84, fd85}, [rd6+10000];
mul.f64 fd88, fd84, fd46;
mul.f64 fd89, fd85, fd64;
sub.f64 fd90, fd88, fd89;
mul.f64 fd91, fd84, fd64;
fma.rn.f64 fd92, fd85, fd46, fd91;
mul.f64 fd93, fd65, fd84;
mul.f64 fd94, fd66, fd85;
sub.f64 fd95, fd93, fd94;
mul.f64 fd96, fd65, fd85;
fma.rn.f64 fd97, fd66, fd84, fd96;
mul.f64 fd98, fd95, fd38;
mul.f64 fd99, fd97, fd56;
sub.f64 fd100, fd98, fd99;
mul.f64 fd101, fd95, fd56;
fma.rn.f64 fd102, fd97, fd38, fd101;
mad.lo.s32 r8, r5, 25000, r3;
barrier.sync 0;
mad.lo.s32 r9, r7, 40, r8;
st.shared.f64 [r9], fd24;
st.shared.f64 [r9+8], fd71;
st.shared.f64 [r9+16], fd81;
st.shared.f64 [r9+24], fd90;
st.shared.f64 [r9+32], fd100;
barrier.sync 0;
shl.b32 r10, r7, 5;
sub.s32 r11, r9, r10;
ld.shared.f64 fd103, [r11];
ld.shared.f64 fd104, [r11+5000];
ld.shared.f64 fd105, [r11+10000];
ld.shared.f64 fd106, [r11+15000];
ld.shared.f64 fd107, [r11+20000];
barrier.sync 0;
st.shared.f64 [r9], fd28;
st.shared.f64 [r9+8], fd73;
st.shared.f64 [r9+16], fd83;
st.shared.f64 [r9+24], fd92;
st.shared.f64 [r9+32], fd102;
barrier.sync 0;
ld.shared.f64 fd108, [r11];
ld.shared.f64 fd109, [r11+5000];
ld.shared.f64 fd110, [r11+10000];
ld.shared.f64 fd111, [r11+15000];
ld.shared.f64 fd112, [r11+20000];
add.f64 fd113, fd104, fd107;
add.f64 fd114, fd103, fd113;
add.f64 fd115, fd105, fd106;
add.f64 fd116, fd115, fd114;
add.f64 fd117, fd109, fd112;
add.f64 fd118, fd108, fd117;
add.f64 fd119, fd110, fd111;
add.f64 fd120, fd119, fd118;
fma.rn.f64 fd121, fd113, 0d3FD3C6EF372FE950, fd103;
mul.f64 fd122, fd115, 0d3FE9E3779B97F4A8;
sub.f64 fd123, fd121, fd122;
sub.f64 fd124, fd109, fd112;
mul.f64 fd125, fd124, 0d3FEE6F0E134454FF;
sub.f64 fd126, fd110, fd111;
mul.f64 fd127, fd126, 0dBFE2CF2304755A5E;
sub.f64 fd128, fd127, fd125;
sub.f64 fd129, fd123, fd128;
add.f64 fd130, fd128, fd123;
mul.f64 fd131, fd113, 0d3FE9E3779B97F4A8;
sub.f64 fd132, fd103, fd131;
fma.rn.f64 fd133, fd115, 0d3FD3C6EF372FE950, fd132;
mul.f64 fd134, fd124, 0d3FE2CF2304755A5E;
mul.f64 fd135, fd126, 0d3FEE6F0E134454FF;
sub.f64 fd136, fd135, fd134;
sub.f64 fd137, fd133, fd136;
add.f64 fd138, fd136, fd133;
fma.rn.f64 fd139, fd117, 0d3FD3C6EF372FE950, fd108;
mul.f64 fd140, fd119, 0d3FE9E3779B97F4A8;
sub.f64 fd141, fd139, fd140;
sub.f64 fd142, fd104, fd107;
mul.f64 fd143, fd142, 0d3FEE6F0E134454FF;
sub.f64 fd144, fd105, fd106;
mul.f64 fd145, fd144, 0dBFE2CF2304755A5E;
sub.f64 fd146, fd145, fd143;
add.f64 fd147, fd146, fd141;
sub.f64 fd148, fd141, fd146;
mul.f64 fd149, fd117, 0d3FE9E3779B97F4A8;
sub.f64 fd150, fd108, fd149;
fma.rn.f64 fd151, fd119, 0d3FD3C6EF372FE950, fd150;
mul.f64 fd152, fd142, 0d3FE2CF2304755A5E;
mul.f64 fd153, fd144, 0d3FEE6F0E134454FF;
sub.f64 fd154, fd153, fd152;
add.f64 fd155, fd154, fd151;
sub.f64 fd156, fd151, fd154;
mul.wide.u32 rd7, r7, -858993459;
shr.u64 rd8, rd7, 34;
cvt.u32.u64 r12, rd8;
mul.lo.s32 r13, r12, 5;
sub.s32 r14, r7, r13;
mul.wide.u32 rd9, r12, 16;
mov.u64 rd10, %12;
add.s64 rd11, rd10, rd9;
ld.global.v2.f64 {fd157, fd158}, [rd11];
mul.f64 fd161, fd157, fd129;
mul.f64 fd162, fd158, fd147;
sub.f64 fd163, fd161, fd162;
mul.f64 fd164, fd157, fd147;
fma.rn.f64 fd165, fd158, fd129, fd164;
mul.f64 fd166, fd157, fd157;
mul.f64 fd167, fd158, fd158;
sub.f64 fd168, fd166, fd167;
mul.f64 fd169, fd158, fd157;
fma.rn.f64 fd170, fd158, fd157, fd169;
mul.f64 fd171, fd168, fd137;
mul.f64 fd172, fd170, fd155;
sub.f64 fd173, fd171, fd172;
mul.f64 fd174, fd168, fd155;
fma.rn.f64 fd175, fd170, fd137, fd174;
ld.global.v2.f64 {fd176, fd177}, [rd11+2000];
mul.f64 fd180, fd176, fd138;
mul.f64 fd181, fd177, fd156;
sub.f64 fd182, fd180, fd181;
mul.f64 fd183, fd176, fd156;
fma.rn.f64 fd184, fd177, fd138, fd183;
mul.f64 fd185, fd157, fd176;
mul.f64 fd186, fd158, fd177;
sub.f64 fd187, fd185, fd186;
mul.f64 fd188, fd157, fd177;
fma.rn.f64 fd189, fd158, fd176, fd188;
mul.f64 fd190, fd187, fd130;
mul.f64 fd191, fd189, fd148;
sub.f64 fd192, fd190, fd191;
mul.f64 fd193, fd187, fd148;
fma.rn.f64 fd194, fd189, fd130, fd193;
shl.b32 r15, r14, 3;
add.s32 r16, r8, r15;
barrier.sync 0;
mad.lo.s32 r17, r12, 200, r16;
st.shared.f64 [r17], fd116;
st.shared.f64 [r17+40], fd163;
st.shared.f64 [r17+80], fd173;
st.shared.f64 [r17+120], fd182;
st.shared.f64 [r17+160], fd192;
barrier.sync 0;
ld.shared.f64 fd195, [r11];
ld.shared.f64 fd196, [r11+5000];
ld.shared.f64 fd197, [r11+10000];
ld.shared.f64 fd198, [r11+15000];
ld.shared.f64 fd199, [r11+20000];
barrier.sync 0;
st.shared.f64 [r17], fd120;
st.shared.f64 [r17+40], fd165;
st.shared.f64 [r17+80], fd175;
st.shared.f64 [r17+120], fd184;
st.shared.f64 [r17+160], fd194;
barrier.sync 0;
ld.shared.f64 fd200, [r11];
ld.shared.f64 fd201, [r11+5000];
ld.shared.f64 fd202, [r11+10000];
ld.shared.f64 fd203, [r11+15000];
ld.shared.f64 fd204, [r11+20000];
add.f64 fd205, fd196, fd199;
add.f64 fd206, fd195, fd205;
add.f64 fd207, fd197, fd198;
add.f64 fd208, fd207, fd206;
add.f64 fd209, fd201, fd204;
add.f64 fd210, fd200, fd209;
add.f64 fd211, fd202, fd203;
add.f64 fd212, fd211, fd210;
fma.rn.f64 fd213, fd205, 0d3FD3C6EF372FE950, fd195;
mul.f64 fd214, fd207, 0d3FE9E3779B97F4A8;
sub.f64 fd215, fd213, fd214;
sub.f64 fd216, fd201, fd204;
mul.f64 fd217, fd216, 0d3FEE6F0E134454FF;
sub.f64 fd218, fd202, fd203;
mul.f64 fd219, fd218, 0dBFE2CF2304755A5E;
sub.f64 fd220, fd219, fd217;
sub.f64 fd221, fd215, fd220;
add.f64 fd222, fd220, fd215;
mul.f64 fd223, fd205, 0d3FE9E3779B97F4A8;
sub.f64 fd224, fd195, fd223;
fma.rn.f64 fd225, fd207, 0d3FD3C6EF372FE950, fd224;
mul.f64 fd226, fd216, 0d3FE2CF2304755A5E;
mul.f64 fd227, fd218, 0d3FEE6F0E134454FF;
sub.f64 fd228, fd227, fd226;
sub.f64 fd229, fd225, fd228;
add.f64 fd230, fd228, fd225;
fma.rn.f64 fd231, fd209, 0d3FD3C6EF372FE950, fd200;
mul.f64 fd232, fd211, 0d3FE9E3779B97F4A8;
sub.f64 fd233, fd231, fd232;
sub.f64 fd234, fd196, fd199;
mul.f64 fd235, fd234, 0d3FEE6F0E134454FF;
sub.f64 fd236, fd197, fd198;
mul.f64 fd237, fd236, 0dBFE2CF2304755A5E;
sub.f64 fd238, fd237, fd235;
add.f64 fd239, fd238, fd233;
sub.f64 fd240, fd233, fd238;
mul.f64 fd241, fd209, 0d3FE9E3779B97F4A8;
sub.f64 fd242, fd200, fd241;
fma.rn.f64 fd243, fd211, 0d3FD3C6EF372FE950, fd242;
mul.f64 fd244, fd234, 0d3FE2CF2304755A5E;
mul.f64 fd245, fd236, 0d3FEE6F0E134454FF;
sub.f64 fd246, fd245, fd244;
add.f64 fd247, fd246, fd243;
sub.f64 fd248, fd243, fd246;
mul.wide.u32 rd12, r7, 1374389535;
shr.u64 rd13, rd12, 35;
cvt.u32.u64 r18, rd13;
mul.lo.s32 r19, r18, 25;
sub.s32 r20, r7, r19;
mul.wide.u32 rd14, r18, 16;
mov.u64 rd15, %13;
add.s64 rd16, rd15, rd14;
ld.global.v2.f64 {fd249, fd250}, [rd16];
mul.f64 fd253, fd249, fd221;
mul.f64 fd254, fd250, fd239;
sub.f64 fd255, fd253, fd254;
mul.f64 fd256, fd249, fd239;
fma.rn.f64 fd257, fd250, fd221, fd256;
mul.f64 fd258, fd249, fd249;
mul.f64 fd259, fd250, fd250;
sub.f64 fd260, fd258, fd259;
mul.f64 fd261, fd250, fd249;
fma.rn.f64 fd262, fd250, fd249, fd261;
mul.f64 fd263, fd260, fd229;
mul.f64 fd264, fd262, fd247;
sub.f64 fd265, fd263, fd264;
mul.f64 fd266, fd260, fd247;
fma.rn.f64 fd267, fd262, fd229, fd266;
ld.global.v2.f64 {fd268, fd269}, [rd16+400];
mul.f64 fd272, fd268, fd230;
mul.f64 fd273, fd269, fd248;
sub.f64 fd274, fd272, fd273;
mul.f64 fd275, fd268, fd248;
fma.rn.f64 fd276, fd269, fd230, fd275;
mul.f64 fd277, fd249, fd268;
mul.f64 fd278, fd250, fd269;
sub.f64 fd279, fd277, fd278;
mul.f64 fd280, fd249, fd269;
fma.rn.f64 fd281, fd250, fd268, fd280;
mul.f64 fd282, fd279, fd222;
mul.f64 fd283, fd281, fd240;
sub.f64 fd284, fd282, fd283;
mul.f64 fd285, fd279, fd240;
fma.rn.f64 fd286, fd281, fd222, fd285;
shl.b32 r21, r20, 3;
add.s32 r22, r8, r21;
barrier.sync 0;
mad.lo.s32 r23, r18, 1000, r22;
st.shared.f64 [r23], fd208;
st.shared.f64 [r23+200], fd255;
st.shared.f64 [r23+400], fd265;
st.shared.f64 [r23+600], fd274;
st.shared.f64 [r23+800], fd284;
barrier.sync 0;
ld.shared.f64 fd287, [r11];
ld.shared.f64 fd288, [r11+5000];
ld.shared.f64 fd289, [r11+10000];
ld.shared.f64 fd290, [r11+15000];
ld.shared.f64 fd291, [r11+20000];
barrier.sync 0;
st.shared.f64 [r23], fd212;
st.shared.f64 [r23+200], fd257;
st.shared.f64 [r23+400], fd267;
st.shared.f64 [r23+600], fd276;
st.shared.f64 [r23+800], fd286;
barrier.sync 0;
ld.shared.f64 fd292, [r11];
ld.shared.f64 fd293, [r11+5000];
ld.shared.f64 fd294, [r11+10000];
ld.shared.f64 fd295, [r11+15000];
ld.shared.f64 fd296, [r11+20000];
add.f64 fd297, fd288, fd291;
add.f64 fd298, fd287, fd297;
add.f64 fd299, fd289, fd290;
add.f64 fd300, fd299, fd298;
add.f64 fd301, fd293, fd296;
add.f64 fd302, fd292, fd301;
add.f64 fd303, fd294, fd295;
add.f64 fd304, fd303, fd302;
fma.rn.f64 fd305, fd297, 0d3FD3C6EF372FE950, fd287;
mul.f64 fd306, fd299, 0d3FE9E3779B97F4A8;
sub.f64 fd307, fd305, fd306;
sub.f64 fd308, fd293, fd296;
mul.f64 fd309, fd308, 0d3FEE6F0E134454FF;
sub.f64 fd310, fd294, fd295;
mul.f64 fd311, fd310, 0dBFE2CF2304755A5E;
sub.f64 fd312, fd311, fd309;
sub.f64 fd313, fd307, fd312;
add.f64 fd314, fd312, fd307;
mul.f64 fd315, fd297, 0d3FE9E3779B97F4A8;
sub.f64 fd316, fd287, fd315;
fma.rn.f64 fd317, fd299, 0d3FD3C6EF372FE950, fd316;
mul.f64 fd318, fd308, 0d3FE2CF2304755A5E;
mul.f64 fd319, fd310, 0d3FEE6F0E134454FF;
sub.f64 fd320, fd319, fd318;
sub.f64 fd321, fd317, fd320;
add.f64 fd322, fd320, fd317;
fma.rn.f64 fd323, fd301, 0d3FD3C6EF372FE950, fd292;
mul.f64 fd324, fd303, 0d3FE9E3779B97F4A8;
sub.f64 fd325, fd323, fd324;
sub.f64 fd326, fd288, fd291;
mul.f64 fd327, fd326, 0d3FEE6F0E134454FF;
sub.f64 fd328, fd289, fd290;
mul.f64 fd329, fd328, 0dBFE2CF2304755A5E;
sub.f64 fd330, fd329, fd327;
add.f64 fd331, fd330, fd325;
sub.f64 fd332, fd325, fd330;
mul.f64 fd333, fd301, 0d3FE9E3779B97F4A8;
sub.f64 fd334, fd292, fd333;
fma.rn.f64 fd335, fd303, 0d3FD3C6EF372FE950, fd334;
mul.f64 fd336, fd326, 0d3FE2CF2304755A5E;
mul.f64 fd337, fd328, 0d3FEE6F0E134454FF;
sub.f64 fd338, fd337, fd336;
add.f64 fd339, fd338, fd335;
sub.f64 fd340, fd335, fd338;
mul.wide.u32 rd17, r7, 274877907;
shr.u64 rd18, rd17, 35;
cvt.u32.u64 r24, rd18;
mul.lo.s32 r25, r24, 125;
sub.s32 r26, r7, r25;
mul.wide.u32 rd19, r24, 16;
mov.u64 rd20, %14;
add.s64 rd21, rd20, rd19;
ld.global.v2.f64 {fd341, fd342}, [rd21];
mul.f64 fd345, fd341, fd313;
mul.f64 fd346, fd342, fd331;
sub.f64 fd347, fd345, fd346;
mul.f64 fd348, fd341, fd331;
fma.rn.f64 fd349, fd342, fd313, fd348;
mul.f64 fd350, fd341, fd341;
mul.f64 fd351, fd342, fd342;
sub.f64 fd352, fd350, fd351;
mul.f64 fd353, fd342, fd341;
fma.rn.f64 fd354, fd342, fd341, fd353;
mul.f64 fd355, fd352, fd321;
mul.f64 fd356, fd354, fd339;
sub.f64 fd357, fd355, fd356;
mul.f64 fd358, fd352, fd339;
fma.rn.f64 fd359, fd354, fd321, fd358;
ld.global.v2.f64 {fd360, fd361}, [rd21+80];
mul.f64 fd364, fd360, fd322;
mul.f64 fd365, fd361, fd340;
sub.f64 fd366, fd364, fd365;
mul.f64 fd367, fd360, fd340;
fma.rn.f64 fd368, fd361, fd322, fd367;
mul.f64 fd369, fd341, fd360;
mul.f64 fd370, fd342, fd361;
sub.f64 fd371, fd369, fd370;
mul.f64 fd372, fd341, fd361;
fma.rn.f64 fd373, fd342, fd360, fd372;
mul.f64 fd374, fd371, fd314;
mul.f64 fd375, fd373, fd332;
sub.f64 fd376, fd374, fd375;
mul.f64 fd377, fd371, fd332;
fma.rn.f64 fd378, fd373, fd314, fd377;
shl.b32 r27, r26, 3;
add.s32 r28, r8, r27;
barrier.sync 0;
mad.lo.s32 r29, r24, 5000, r28;
st.shared.f64 [r29], fd300;
st.shared.f64 [r29+1000], fd347;
st.shared.f64 [r29+2000], fd357;
st.shared.f64 [r29+3000], fd366;
st.shared.f64 [r29+4000], fd376;
barrier.sync 0;
ld.shared.f64 fd379, [r11];
ld.shared.f64 fd380, [r11+5000];
ld.shared.f64 fd381, [r11+10000];
ld.shared.f64 fd382, [r11+15000];
ld.shared.f64 fd383, [r11+20000];
barrier.sync 0;
st.shared.f64 [r29], fd304;
st.shared.f64 [r29+1000], fd349;
st.shared.f64 [r29+2000], fd359;
st.shared.f64 [r29+3000], fd368;
st.shared.f64 [r29+4000], fd378;
barrier.sync 0;
ld.shared.f64 fd384, [r11];
ld.shared.f64 fd385, [r11+5000];
ld.shared.f64 fd386, [r11+10000];
ld.shared.f64 fd387, [r11+15000];
ld.shared.f64 fd388, [r11+20000];
add.f64 fd389, fd380, fd383;
add.f64 fd390, fd379, fd389;
add.f64 fd391, fd381, fd382;
add.f64 fd392, fd385, fd388;
add.f64 fd393, fd384, fd392;
add.f64 fd394, fd386, fd387;
fma.rn.f64 fd395, fd389, 0d3FD3C6EF372FE950, fd379;
mul.f64 fd396, fd391, 0d3FE9E3779B97F4A8;
sub.f64 fd397, fd395, fd396;
sub.f64 fd398, fd385, fd388;
mul.f64 fd399, fd398, 0d3FEE6F0E134454FF;
sub.f64 fd400, fd386, fd387;
mul.f64 fd401, fd400, 0dBFE2CF2304755A5E;
sub.f64 fd402, fd401, fd399;
mul.f64 fd403, fd389, 0d3FE9E3779B97F4A8;
sub.f64 fd404, fd379, fd403;
fma.rn.f64 fd405, fd391, 0d3FD3C6EF372FE950, fd404;
mul.f64 fd406, fd398, 0d3FE2CF2304755A5E;
mul.f64 fd407, fd400, 0d3FEE6F0E134454FF;
sub.f64 fd408, fd407, fd406;
fma.rn.f64 fd409, fd392, 0d3FD3C6EF372FE950, fd384;
mul.f64 fd410, fd394, 0d3FE9E3779B97F4A8;
sub.f64 fd411, fd409, fd410;
sub.f64 fd412, fd380, fd383;
mul.f64 fd413, fd412, 0d3FEE6F0E134454FF;
sub.f64 fd414, fd381, fd382;
mul.f64 fd415, fd414, 0dBFE2CF2304755A5E;
sub.f64 fd416, fd415, fd413;
mul.f64 fd417, fd392, 0d3FE9E3779B97F4A8;
sub.f64 fd418, fd384, fd417;
fma.rn.f64 fd419, fd394, 0d3FD3C6EF372FE950, fd418;
mul.f64 fd420, fd412, 0d3FE2CF2304755A5E;
mul.f64 fd421, fd414, 0d3FEE6F0E134454FF;
sub.f64 fd422, fd421, fd420;
add.f64 %0, fd391, fd390;
add.f64 %1, fd394, fd393;
add.f64 %3, fd416, fd411;
sub.f64 %2, fd397, fd402;
sub.f64 %4, fd405, fd408;
add.f64 %5, fd422, fd419;
add.f64 %6, fd408, fd405;
sub.f64 %7, fd419, fd422;
sub.f64 %9, fd411, fd416;
add.f64 %8, fd402, fd397;
})"
     : "=d"(rmem[0].x), "=d"(rmem[0].y), "=d"(rmem[1].x), "=d"(rmem[1].y), "=d"(rmem[2].x), "=d"(rmem[2].y), "=d"(rmem[3].x), "=d"(rmem[3].y), "=d"(rmem[4].x), "=d"(rmem[4].y): "r"(smem), "l"(lut_dp_5_3125), "l"(lut_dp_5_625), "l"(lut_dp_5_125), "l"(lut_dp_5_25), "d"(rmem[0].x), "d"(rmem[0].y), "d"(rmem[1].x), "d"(rmem[1].y), "d"(rmem[1].y), "d"(rmem[2].x), "d"(rmem[2].y), "d"(rmem[2].y), "d"(rmem[3].x), "d"(rmem[3].y), "d"(rmem[4].x), "d"(rmem[4].y));
};




template<> __forceinline__ __device__ void cufftdx_private_function<544, double, 1>(cufftdx::detail::complex<double> *rmem, unsigned smem){

asm volatile (R"({
.reg .b32 r<30>;
.reg .f64 fd<473>;
.reg .b64 rd<22>;
mov.u32 r1, %tid.y;
mov.u32 r2, %10;
mad.lo.s32 r3, r1, 50000, r2;
mov.u32 r4, %tid.x;
add.f64 fd21, %17, %25;
add.f64 fd22, %15, fd21;
add.f64 fd23, %20, %23;
add.f64 fd24, %19, %26;
add.f64 fd25, %16, fd24;
add.f64 fd26, %22, %24;
fma.rn.f64 fd27, fd21, 0d3FD3C6EF372FE950, %15;
mul.f64 fd28, fd23, 0d3FE9E3779B97F4A8;
sub.f64 fd29, fd27, fd28;
sub.f64 fd30, %19, %26;
mul.f64 fd31, fd30, 0d3FEE6F0E134454FF;
sub.f64 fd32, %22, %24;
mul.f64 fd33, fd32, 0dBFE2CF2304755A5E;
sub.f64 fd34, fd33, fd31;
sub.f64 fd35, fd29, fd34;
add.f64 fd36, fd34, fd29;
mul.f64 fd37, fd21, 0d3FE9E3779B97F4A8;
sub.f64 fd38, %15, fd37;
fma.rn.f64 fd39, fd23, 0d3FD3C6EF372FE950, fd38;
mul.f64 fd40, fd30, 0d3FE2CF2304755A5E;
mul.f64 fd41, fd32, 0d3FEE6F0E134454FF;
sub.f64 fd42, fd41, fd40;
sub.f64 fd43, fd39, fd42;
add.f64 fd44, fd42, fd39;
fma.rn.f64 fd45, fd24, 0d3FD3C6EF372FE950, %16;
mul.f64 fd46, fd26, 0d3FE9E3779B97F4A8;
sub.f64 fd47, fd45, fd46;
sub.f64 fd48, %17, %25;
mul.f64 fd49, fd48, 0d3FEE6F0E134454FF;
sub.f64 fd50, %20, %23;
mul.f64 fd51, fd50, 0dBFE2CF2304755A5E;
sub.f64 fd52, fd51, fd49;
add.f64 fd53, fd52, fd47;
sub.f64 fd54, fd47, fd52;
mul.f64 fd55, fd24, 0d3FE9E3779B97F4A8;
sub.f64 fd56, %16, fd55;
fma.rn.f64 fd57, fd26, 0d3FD3C6EF372FE950, fd56;
mul.f64 fd58, fd48, 0d3FE2CF2304755A5E;
mul.f64 fd59, fd50, 0d3FEE6F0E134454FF;
sub.f64 fd60, fd59, fd58;
add.f64 fd61, fd60, fd57;
sub.f64 fd62, fd57, fd60;
mul.wide.u32 rd2, r4, -776530087;
shr.u64 rd3, rd2, 41;
cvt.u32.u64 r5, rd3;
mul.lo.s32 r6, r5, 625;
sub.s32 r7, r4, r6;
mad.lo.s32 r8, r5, 50000, r3;
mul.wide.u32 rd4, r7, 16;
mov.u64 rd5, %11;
add.s64 rd6, rd5, rd4;
ld.global.v2.f64 {fd63, fd64}, [rd6];
mul.f64 fd67, fd63, fd35;
mul.f64 fd68, fd64, fd53;
mul.f64 fd69, fd63, fd53;
mul.f64 fd70, fd63, fd63;
mul.f64 fd71, fd64, fd64;
sub.f64 fd72, fd70, fd71;
mul.f64 fd73, fd64, fd63;
fma.rn.f64 fd74, fd64, fd63, fd73;
mul.f64 fd75, fd72, fd43;
mul.f64 fd76, fd74, fd61;
mul.f64 fd77, fd72, fd61;
ld.global.v2.f64 {fd78, fd79}, [rd6+10000];
mul.f64 fd82, fd78, fd44;
mul.f64 fd83, fd79, fd62;
mul.f64 fd84, fd78, fd62;
mul.f64 fd85, fd63, fd78;
mul.f64 fd86, fd64, fd79;
sub.f64 fd87, fd85, fd86;
mul.f64 fd88, fd63, fd79;
fma.rn.f64 fd89, fd64, fd78, fd88;
mul.f64 fd90, fd87, fd36;
mul.f64 fd91, fd89, fd54;
mul.f64 fd92, fd87, fd54;
barrier.sync 0;
mad.lo.s32 r9, r7, 80, r8;
add.f64 fd93, fd26, fd25;
add.f64 fd94, fd23, fd22;
st.shared.v2.f64 [r9], {fd94, fd93};
fma.rn.f64 fd95, fd64, fd35, fd69;
sub.f64 fd96, fd67, fd68;
st.shared.v2.f64 [r9+16], {fd96, fd95};
fma.rn.f64 fd97, fd74, fd43, fd77;
sub.f64 fd98, fd75, fd76;
st.shared.v2.f64 [r9+32], {fd98, fd97};
fma.rn.f64 fd99, fd79, fd44, fd84;
sub.f64 fd100, fd82, fd83;
st.shared.v2.f64 [r9+48], {fd100, fd99};
fma.rn.f64 fd101, fd89, fd36, fd92;
sub.f64 fd102, fd90, fd91;
st.shared.v2.f64 [r9+64], {fd102, fd101};
barrier.sync 0;
shl.b32 r10, r7, 6;
sub.s32 r11, r9, r10;
ld.shared.v2.f64 {fd103, fd104}, [r11];
ld.shared.v2.f64 {fd107, fd108}, [r11+10000];
ld.shared.v2.f64 {fd111, fd112}, [r11+20000];
ld.shared.v2.f64 {fd115, fd116}, [r11+30000];
ld.shared.v2.f64 {fd119, fd120}, [r11+40000];
add.f64 fd123, fd107, fd119;
add.f64 fd124, fd103, fd123;
add.f64 fd125, fd111, fd115;
add.f64 fd126, fd108, fd120;
add.f64 fd127, fd104, fd126;
add.f64 fd128, fd112, fd116;
fma.rn.f64 fd129, fd123, 0d3FD3C6EF372FE950, fd103;
mul.f64 fd130, fd125, 0d3FE9E3779B97F4A8;
sub.f64 fd131, fd129, fd130;
sub.f64 fd132, fd108, fd120;
mul.f64 fd133, fd132, 0d3FEE6F0E134454FF;
sub.f64 fd134, fd112, fd116;
mul.f64 fd135, fd134, 0dBFE2CF2304755A5E;
sub.f64 fd136, fd135, fd133;
sub.f64 fd137, fd131, fd136;
add.f64 fd138, fd136, fd131;
mul.f64 fd139, fd123, 0d3FE9E3779B97F4A8;
sub.f64 fd140, fd103, fd139;
fma.rn.f64 fd141, fd125, 0d3FD3C6EF372FE950, fd140;
mul.f64 fd142, fd132, 0d3FE2CF2304755A5E;
mul.f64 fd143, fd134, 0d3FEE6F0E134454FF;
sub.f64 fd144, fd143, fd142;
sub.f64 fd145, fd141, fd144;
add.f64 fd146, fd144, fd141;
fma.rn.f64 fd147, fd126, 0d3FD3C6EF372FE950, fd104;
mul.f64 fd148, fd128, 0d3FE9E3779B97F4A8;
sub.f64 fd149, fd147, fd148;
sub.f64 fd150, fd107, fd119;
mul.f64 fd151, fd150, 0d3FEE6F0E134454FF;
sub.f64 fd152, fd111, fd115;
mul.f64 fd153, fd152, 0dBFE2CF2304755A5E;
sub.f64 fd154, fd153, fd151;
add.f64 fd155, fd154, fd149;
sub.f64 fd156, fd149, fd154;
mul.f64 fd157, fd126, 0d3FE9E3779B97F4A8;
sub.f64 fd158, fd104, fd157;
fma.rn.f64 fd159, fd128, 0d3FD3C6EF372FE950, fd158;
mul.f64 fd160, fd150, 0d3FE2CF2304755A5E;
mul.f64 fd161, fd152, 0d3FEE6F0E134454FF;
sub.f64 fd162, fd161, fd160;
add.f64 fd163, fd162, fd159;
sub.f64 fd164, fd159, fd162;
mul.wide.u32 rd7, r7, -858993459;
shr.u64 rd8, rd7, 34;
cvt.u32.u64 r12, rd8;
mul.lo.s32 r13, r12, 5;
sub.s32 r14, r7, r13;
mul.wide.u32 rd9, r12, 16;
mov.u64 rd10, %12;
add.s64 rd11, rd10, rd9;
ld.global.v2.f64 {fd165, fd166}, [rd11];
mul.f64 fd169, fd165, fd137;
mul.f64 fd170, fd166, fd155;
mul.f64 fd171, fd165, fd155;
mul.f64 fd172, fd165, fd165;
mul.f64 fd173, fd166, fd166;
sub.f64 fd174, fd172, fd173;
mul.f64 fd175, fd166, fd165;
fma.rn.f64 fd176, fd166, fd165, fd175;
mul.f64 fd177, fd174, fd145;
mul.f64 fd178, fd176, fd163;
mul.f64 fd179, fd174, fd163;
ld.global.v2.f64 {fd180, fd181}, [rd11+2000];
mul.f64 fd184, fd180, fd146;
mul.f64 fd185, fd181, fd164;
mul.f64 fd186, fd180, fd164;
mul.f64 fd187, fd165, fd180;
mul.f64 fd188, fd166, fd181;
sub.f64 fd189, fd187, fd188;
mul.f64 fd190, fd165, fd181;
fma.rn.f64 fd191, fd166, fd180, fd190;
mul.f64 fd192, fd189, fd138;
mul.f64 fd193, fd191, fd156;
mul.f64 fd194, fd189, fd156;
shl.b32 r15, r14, 4;
add.s32 r16, r8, r15;
barrier.sync 0;
mad.lo.s32 r17, r12, 400, r16;
add.f64 fd195, fd128, fd127;
add.f64 fd196, fd125, fd124;
st.shared.v2.f64 [r17], {fd196, fd195};
fma.rn.f64 fd197, fd166, fd137, fd171;
sub.f64 fd198, fd169, fd170;
st.shared.v2.f64 [r17+80], {fd198, fd197};
fma.rn.f64 fd199, fd176, fd145, fd179;
sub.f64 fd200, fd177, fd178;
st.shared.v2.f64 [r17+160], {fd200, fd199};
fma.rn.f64 fd201, fd181, fd146, fd186;
sub.f64 fd202, fd184, fd185;
st.shared.v2.f64 [r17+240], {fd202, fd201};
fma.rn.f64 fd203, fd191, fd138, fd194;
sub.f64 fd204, fd192, fd193;
st.shared.v2.f64 [r17+320], {fd204, fd203};
barrier.sync 0;
ld.shared.v2.f64 {fd205, fd206}, [r11];
ld.shared.v2.f64 {fd209, fd210}, [r11+10000];
ld.shared.v2.f64 {fd213, fd214}, [r11+20000];
ld.shared.v2.f64 {fd217, fd218}, [r11+30000];
ld.shared.v2.f64 {fd221, fd222}, [r11+40000];
add.f64 fd225, fd209, fd221;
add.f64 fd226, fd205, fd225;
add.f64 fd227, fd213, fd217;
add.f64 fd228, fd210, fd222;
add.f64 fd229, fd206, fd228;
add.f64 fd230, fd214, fd218;
fma.rn.f64 fd231, fd225, 0d3FD3C6EF372FE950, fd205;
mul.f64 fd232, fd227, 0d3FE9E3779B97F4A8;
sub.f64 fd233, fd231, fd232;
sub.f64 fd234, fd210, fd222;
mul.f64 fd235, fd234, 0d3FEE6F0E134454FF;
sub.f64 fd236, fd214, fd218;
mul.f64 fd237, fd236, 0dBFE2CF2304755A5E;
sub.f64 fd238, fd237, fd235;
sub.f64 fd239, fd233, fd238;
add.f64 fd240, fd238, fd233;
mul.f64 fd241, fd225, 0d3FE9E3779B97F4A8;
sub.f64 fd242, fd205, fd241;
fma.rn.f64 fd243, fd227, 0d3FD3C6EF372FE950, fd242;
mul.f64 fd244, fd234, 0d3FE2CF2304755A5E;
mul.f64 fd245, fd236, 0d3FEE6F0E134454FF;
sub.f64 fd246, fd245, fd244;
sub.f64 fd247, fd243, fd246;
add.f64 fd248, fd246, fd243;
fma.rn.f64 fd249, fd228, 0d3FD3C6EF372FE950, fd206;
mul.f64 fd250, fd230, 0d3FE9E3779B97F4A8;
sub.f64 fd251, fd249, fd250;
sub.f64 fd252, fd209, fd221;
mul.f64 fd253, fd252, 0d3FEE6F0E134454FF;
sub.f64 fd254, fd213, fd217;
mul.f64 fd255, fd254, 0dBFE2CF2304755A5E;
sub.f64 fd256, fd255, fd253;
add.f64 fd257, fd256, fd251;
sub.f64 fd258, fd251, fd256;
mul.f64 fd259, fd228, 0d3FE9E3779B97F4A8;
sub.f64 fd260, fd206, fd259;
fma.rn.f64 fd261, fd230, 0d3FD3C6EF372FE950, fd260;
mul.f64 fd262, fd252, 0d3FE2CF2304755A5E;
mul.f64 fd263, fd254, 0d3FEE6F0E134454FF;
sub.f64 fd264, fd263, fd262;
add.f64 fd265, fd264, fd261;
sub.f64 fd266, fd261, fd264;
mul.wide.u32 rd12, r7, 1374389535;
shr.u64 rd13, rd12, 35;
cvt.u32.u64 r18, rd13;
mul.lo.s32 r19, r18, 25;
sub.s32 r20, r7, r19;
mul.wide.u32 rd14, r18, 16;
mov.u64 rd15, %13;
add.s64 rd16, rd15, rd14;
ld.global.v2.f64 {fd267, fd268}, [rd16];
mul.f64 fd271, fd267, fd239;
mul.f64 fd272, fd268, fd257;
mul.f64 fd273, fd267, fd257;
mul.f64 fd274, fd267, fd267;
mul.f64 fd275, fd268, fd268;
sub.f64 fd276, fd274, fd275;
mul.f64 fd277, fd268, fd267;
fma.rn.f64 fd278, fd268, fd267, fd277;
mul.f64 fd279, fd276, fd247;
mul.f64 fd280, fd278, fd265;
mul.f64 fd281, fd276, fd265;
ld.global.v2.f64 {fd282, fd283}, [rd16+400];
mul.f64 fd286, fd282, fd248;
mul.f64 fd287, fd283, fd266;
mul.f64 fd288, fd282, fd266;
mul.f64 fd289, fd267, fd282;
mul.f64 fd290, fd268, fd283;
sub.f64 fd291, fd289, fd290;
mul.f64 fd292, fd267, fd283;
fma.rn.f64 fd293, fd268, fd282, fd292;
mul.f64 fd294, fd291, fd240;
mul.f64 fd295, fd293, fd258;
mul.f64 fd296, fd291, fd258;
shl.b32 r21, r20, 4;
add.s32 r22, r8, r21;
barrier.sync 0;
mad.lo.s32 r23, r18, 2000, r22;
add.f64 fd297, fd230, fd229;
add.f64 fd298, fd227, fd226;
st.shared.v2.f64 [r23], {fd298, fd297};
fma.rn.f64 fd299, fd268, fd239, fd273;
sub.f64 fd300, fd271, fd272;
st.shared.v2.f64 [r23+400], {fd300, fd299};
fma.rn.f64 fd301, fd278, fd247, fd281;
sub.f64 fd302, fd279, fd280;
st.shared.v2.f64 [r23+800], {fd302, fd301};
fma.rn.f64 fd303, fd283, fd248, fd288;
sub.f64 fd304, fd286, fd287;
st.shared.v2.f64 [r23+1200], {fd304, fd303};
fma.rn.f64 fd305, fd293, fd240, fd296;
sub.f64 fd306, fd294, fd295;
st.shared.v2.f64 [r23+1600], {fd306, fd305};
barrier.sync 0;
ld.shared.v2.f64 {fd307, fd308}, [r11];
ld.shared.v2.f64 {fd311, fd312}, [r11+10000];
ld.shared.v2.f64 {fd315, fd316}, [r11+20000];
ld.shared.v2.f64 {fd319, fd320}, [r11+30000];
ld.shared.v2.f64 {fd323, fd324}, [r11+40000];
add.f64 fd327, fd311, fd323;
add.f64 fd328, fd307, fd327;
add.f64 fd329, fd315, fd319;
add.f64 fd330, fd312, fd324;
add.f64 fd331, fd308, fd330;
add.f64 fd332, fd316, fd320;
fma.rn.f64 fd333, fd327, 0d3FD3C6EF372FE950, fd307;
mul.f64 fd334, fd329, 0d3FE9E3779B97F4A8;
sub.f64 fd335, fd333, fd334;
sub.f64 fd336, fd312, fd324;
mul.f64 fd337, fd336, 0d3FEE6F0E134454FF;
sub.f64 fd338, fd316, fd320;
mul.f64 fd339, fd338, 0dBFE2CF2304755A5E;
sub.f64 fd340, fd339, fd337;
sub.f64 fd341, fd335, fd340;
add.f64 fd342, fd340, fd335;
mul.f64 fd343, fd327, 0d3FE9E3779B97F4A8;
sub.f64 fd344, fd307, fd343;
fma.rn.f64 fd345, fd329, 0d3FD3C6EF372FE950, fd344;
mul.f64 fd346, fd336, 0d3FE2CF2304755A5E;
mul.f64 fd347, fd338, 0d3FEE6F0E134454FF;
sub.f64 fd348, fd347, fd346;
sub.f64 fd349, fd345, fd348;
add.f64 fd350, fd348, fd345;
fma.rn.f64 fd351, fd330, 0d3FD3C6EF372FE950, fd308;
mul.f64 fd352, fd332, 0d3FE9E3779B97F4A8;
sub.f64 fd353, fd351, fd352;
sub.f64 fd354, fd311, fd323;
mul.f64 fd355, fd354, 0d3FEE6F0E134454FF;
sub.f64 fd356, fd315, fd319;
mul.f64 fd357, fd356, 0dBFE2CF2304755A5E;
sub.f64 fd358, fd357, fd355;
add.f64 fd359, fd358, fd353;
sub.f64 fd360, fd353, fd358;
mul.f64 fd361, fd330, 0d3FE9E3779B97F4A8;
sub.f64 fd362, fd308, fd361;
fma.rn.f64 fd363, fd332, 0d3FD3C6EF372FE950, fd362;
mul.f64 fd364, fd354, 0d3FE2CF2304755A5E;
mul.f64 fd365, fd356, 0d3FEE6F0E134454FF;
sub.f64 fd366, fd365, fd364;
add.f64 fd367, fd366, fd363;
sub.f64 fd368, fd363, fd366;
mul.wide.u32 rd17, r7, 274877907;
shr.u64 rd18, rd17, 35;
cvt.u32.u64 r24, rd18;
mul.lo.s32 r25, r24, 125;
sub.s32 r26, r7, r25;
mul.wide.u32 rd19, r24, 16;
mov.u64 rd20, %14;
add.s64 rd21, rd20, rd19;
ld.global.v2.f64 {fd369, fd370}, [rd21];
mul.f64 fd373, fd369, fd341;
mul.f64 fd374, fd370, fd359;
mul.f64 fd375, fd369, fd359;
mul.f64 fd376, fd369, fd369;
mul.f64 fd377, fd370, fd370;
sub.f64 fd378, fd376, fd377;
mul.f64 fd379, fd370, fd369;
fma.rn.f64 fd380, fd370, fd369, fd379;
mul.f64 fd381, fd378, fd349;
mul.f64 fd382, fd380, fd367;
mul.f64 fd383, fd378, fd367;
ld.global.v2.f64 {fd384, fd385}, [rd21+80];
mul.f64 fd388, fd384, fd350;
mul.f64 fd389, fd385, fd368;
mul.f64 fd390, fd384, fd368;
mul.f64 fd391, fd369, fd384;
mul.f64 fd392, fd370, fd385;
sub.f64 fd393, fd391, fd392;
mul.f64 fd394, fd369, fd385;
fma.rn.f64 fd395, fd370, fd384, fd394;
mul.f64 fd396, fd393, fd342;
mul.f64 fd397, fd395, fd360;
mul.f64 fd398, fd393, fd360;
shl.b32 r27, r26, 4;
add.s32 r28, r8, r27;
barrier.sync 0;
mad.lo.s32 r29, r24, 10000, r28;
add.f64 fd399, fd332, fd331;
add.f64 fd400, fd329, fd328;
st.shared.v2.f64 [r29], {fd400, fd399};
fma.rn.f64 fd401, fd370, fd341, fd375;
sub.f64 fd402, fd373, fd374;
st.shared.v2.f64 [r29+2000], {fd402, fd401};
fma.rn.f64 fd403, fd380, fd349, fd383;
sub.f64 fd404, fd381, fd382;
st.shared.v2.f64 [r29+4000], {fd404, fd403};
fma.rn.f64 fd405, fd385, fd350, fd390;
sub.f64 fd406, fd388, fd389;
st.shared.v2.f64 [r29+6000], {fd406, fd405};
fma.rn.f64 fd407, fd395, fd342, fd398;
sub.f64 fd408, fd396, fd397;
st.shared.v2.f64 [r29+8000], {fd408, fd407};
barrier.sync 0;
ld.shared.v2.f64 {fd409, fd410}, [r11];
ld.shared.v2.f64 {fd413, fd414}, [r11+10000];
ld.shared.v2.f64 {fd417, fd418}, [r11+20000];
ld.shared.v2.f64 {fd421, fd422}, [r11+30000];
ld.shared.v2.f64 {fd425, fd426}, [r11+40000];
add.f64 fd429, fd413, fd425;
add.f64 fd430, fd409, fd429;
add.f64 fd431, fd417, fd421;
add.f64 fd432, fd414, fd426;
add.f64 fd433, fd410, fd432;
add.f64 fd434, fd418, fd422;
fma.rn.f64 fd435, fd429, 0d3FD3C6EF372FE950, fd409;
mul.f64 fd436, fd431, 0d3FE9E3779B97F4A8;
sub.f64 fd437, fd435, fd436;
sub.f64 fd438, fd414, fd426;
mul.f64 fd439, fd438, 0d3FEE6F0E134454FF;
sub.f64 fd440, fd418, fd422;
mul.f64 fd441, fd440, 0dBFE2CF2304755A5E;
sub.f64 fd442, fd441, fd439;
mul.f64 fd443, fd429, 0d3FE9E3779B97F4A8;
sub.f64 fd444, fd409, fd443;
fma.rn.f64 fd445, fd431, 0d3FD3C6EF372FE950, fd444;
mul.f64 fd446, fd438, 0d3FE2CF2304755A5E;
mul.f64 fd447, fd440, 0d3FEE6F0E134454FF;
sub.f64 fd448, fd447, fd446;
fma.rn.f64 fd449, fd432, 0d3FD3C6EF372FE950, fd410;
mul.f64 fd450, fd434, 0d3FE9E3779B97F4A8;
sub.f64 fd451, fd449, fd450;
sub.f64 fd452, fd413, fd425;
mul.f64 fd453, fd452, 0d3FEE6F0E134454FF;
sub.f64 fd454, fd417, fd421;
mul.f64 fd455, fd454, 0dBFE2CF2304755A5E;
sub.f64 fd456, fd455, fd453;
mul.f64 fd457, fd432, 0d3FE9E3779B97F4A8;
sub.f64 fd458, fd410, fd457;
fma.rn.f64 fd459, fd434, 0d3FD3C6EF372FE950, fd458;
mul.f64 fd460, fd452, 0d3FE2CF2304755A5E;
mul.f64 fd461, fd454, 0d3FEE6F0E134454FF;
sub.f64 fd462, fd461, fd460;
add.f64 %1, fd434, fd433;
add.f64 %0, fd431, fd430;
add.f64 %3, fd456, fd451;
sub.f64 %2, fd437, fd442;
add.f64 %5, fd462, fd459;
sub.f64 %4, fd445, fd448;
sub.f64 %7, fd459, fd462;
add.f64 %6, fd448, fd445;
sub.f64 %9, fd451, fd456;
add.f64 %8, fd442, fd437;
})"
     : "=d"(rmem[0].x), "=d"(rmem[0].y), "=d"(rmem[1].x), "=d"(rmem[1].y), "=d"(rmem[2].x), "=d"(rmem[2].y), "=d"(rmem[3].x), "=d"(rmem[3].y), "=d"(rmem[4].x), "=d"(rmem[4].y): "r"(smem), "l"(lut_dp_5_3125), "l"(lut_dp_5_625), "l"(lut_dp_5_125), "l"(lut_dp_5_25), "d"(rmem[0].x), "d"(rmem[0].y), "d"(rmem[1].x), "d"(rmem[1].y), "d"(rmem[1].y), "d"(rmem[2].x), "d"(rmem[2].y), "d"(rmem[2].y), "d"(rmem[3].x), "d"(rmem[3].y), "d"(rmem[4].x), "d"(rmem[4].y));
};


#endif
